PsL Monthly 1993 December

home *** CD-ROM | disk | FTP | other *** search

/ PsL Monthly 1993 December / PSL Monthly Shareware CD-ROM (December 1993).iso / prgmming / dos / pascal / rle.pas < prev next >

Wrap

Pascal/Delphi Source File | 1993-02-03 | 17KB | 479 lines

{$D+,L+} {debug info} {$M 8192,0,655360} {stack size, heapmin, heapmax} PROGRAM RunLengthEncode; { Author: Ken Murphy, CIS 74025,731 Date Written: January 1993 This program implements a Run-Length-Encoding algorithm to compress files. I wrote this to test the decompression speed and compression effect on files containing printer graphics. These files tend to be large with long series repetitive characters. RLE is a bad compression choice if the file to be compressed does not have runs of repetitive bytes. The algorithm gains it's compression from reducing a series of the same byte (up to 127 of them) to 2 bytes. Tests have proven that this RLE decompression is very fast, and it's OK to include this technique within other applications that employ files that can benefit. Tests on my dot matrix printer graphic data files show compressions of about 65% of original size. LZHuf compresses better, but this is faster and simpler to develop for my needs. The program has 3 command-line inputs in the order given: 1. Mode: C for compress or D for Decompress 2. Input file name 3. Output file name. The encoded data file is a series of strings. Each string begins with a "prefix" byte. The leftmost bit of the byte distinguishes between the two types of strings. The remaining 7 bits in the prefix byte are a length value. The prefix byte is followed by one or more data bytes. The two types of compressed strings are "RUN" and "LEN". The RUN variety allows for sequences of identical bytes. Generally, the maximum compression effect results from these. The LEN variety accomodates sequences of data bytes that are not the same. Here's what it looks like in a compressed file: If the leftmost bit of the prefix byte is 1, the string is a "RUN" string. The remaining 7 bits of the prefix byte are the replication count value for the following data byte. Therefore, a RUN string is always 2 bytes. If the leftmost bit of the prefix byte is 0, the string is a "LENGTH" string. The remaining 7 bits of the prefix byte are the number of data bytes following the prefix byte. Since the length value in the prefix byte is limited to 7 bits, a RUN or a LENGTH can be no larger that 127 ($01 - $FF). I don't permit a length value of 0. PASCAL will handle this type of byte as a SHORTINT. Remember that the actual size of a RUN segment is always 2 bytes. A LEN segment never gets larger than 128 bytes, i.e. 1 prefix byte and 127 (max) data bytes. There's performance instrumentation kept and displayed so I can measure the algorithm and how I've coded it. The program was developed in Borland Pascal 7.0 for a REAL target. It uses TurboPower's Object Professional, but there's no "Object Oriented" programming here. There's not much in the way of "fancy" so it should be easy to modify to vanilla Pascal. Also, consider this a blatant advertisement and recommendation for TurboPower's Object Professional library although I'm in no way affiliated with them.} USES DOS, OPCrt, OPString; CONST MaxChars = 127; EndOfFile = 100; BufSize = 255; TYPE RLE = RECORD RepeatCount : shortint; Data : array[1..MaxChars] OF char; END; RLE_Rec_String= array[0..MaxChars] OF char; SegmentTypes = (RUN, LEN); TimeRecord = RECORD Hour : Word; Minute : Word; Second : Word; Sec100 : Word; END; CONST {typed CONST} ReadResult : word = 0; WriteCount : word = 0; Segments : longint = 0; TotalBytesIn : longint = 0; TotalBytesOut : longint = 0; EndInput : boolean = false; DisplayPrefix : string[2] = ' '; InputFileSize : real = 0; LastIBuffer : boolean = false; LastOBuffer : boolean = false; OBufferIndex : integer = BufSize+1; OBuffer : string[BufSize] = ''; IBuffer : string[BufSize] = ''; IBufferIndex : integer = Bufsize+1; VAR SegmentType : SegmentTypes; RLE_Record : RLE; RLE_String : RLE_Rec_String absolute RLE_Record; CurrentChar : char; LastChar : char; InputFile : FILE; OutputFile : FILE; CompressMode : boolean; InputFileName : string; OutputFileName: string; OldPct : string[7]; Started : TimeRecord; Stopped : TimeRecord; {*************************************************************} PROCEDURE ShowProgress; VAR Percent : string[7]; {show the user what percentage of the input we've done - eye candy} BEGIN Str((TotalBytesIn * 100) / InputFileSize : 6 : 1, Percent); IF OldPct <> Percent THEN {this speeds things a bit} BEGIN OldPct:=Percent; FastText('Percentage Completed = ' + Percent, 6, 2); END; END; {ShowProgress} {**************************************************************} PROCEDURE ReadInput (CONST ByteCount : shortint; VAR ReadString : RLE_Rec_String); {feed BYTECOUNT bytes from the input file. A string buffer is used to hold the file's data to minimize the I/O time spent in the file. The output is a 0-index-based char array.} VAR BCount : shortint; BEGIN IF IBufferIndex+ByteCount-1<=ReadResult THEN {more data in buffer?} BEGIN IF ByteCount=1 THEN ReadString[0]:=IBuffer[IBufferIndex] ELSE Move (IBuffer[IBufferIndex],ReadString[0],ByteCount); IBufferIndex := IBufferIndex + ByteCount; END ELSE BEGIN {no, get more from file} IF LastIBuffer THEN EndInput:=true {last buffer was delivered} ELSE BEGIN {read another block} BCount:=ByteCount; {if there's anything in the buffer, output the remainder as the 1st chunk of the data requested} IF IBufferIndex<=BufSize THEN {was the entire buffer used} BEGIN {no, there's more in there to give} IF IBufferIndex=BufSize THEN ReadString[0]:=IBuffer[BufSize] ELSE Move(IBuffer[IBufferIndex],ReadString[0],Length(IBuffer)-IBufferIndex+1); BCount:=BCount-(BufSize-IBufferIndex+1); END; {buffer index <= buffer size} {$I-} BlockRead (InputFile, IBuffer[1], BufSize, ReadResult); {$I+} IF (IOResult = EndOfFile) OR (ReadResult<BufSize) THEN LastIBuffer:=true; {last buffer has been input} IF ReadResult>0 THEN BEGIN IBuffer[0]:=Chr(ReadResult); {set length of string} IF BCount=1 THEN {Asking for only 1 byte} ReadString[ByteCount-BCount]:=IBuffer[1] {yes, faster than 1 byte MOVE} ELSE Move(IBuffer[1],ReadString[ByteCount-BCount],BCount); TotalBytesIn:=TotalBytesIn + ReadResult; IBufferIndex := BCount+1; END; {result>0} END; {input another block} END; {else} END; {ReadRaw} {*************************************************************} PROCEDURE Compress; {Compress the input file by reading it one byte at a time and comparing that byte to the one previously read.} {================================================} PROCEDURE NewSegment (CONST StringType : SegmentTypes); {set the appropriate prefix type; write the string segment and adjust the statistics counters. Lastly insert the latest byte and reset the prefix count to 1} BEGIN CASE StringType OF LEN : BEGIN {prefix byte remains positive < 128} BlockWrite (OutputFile, RLE_Record, RLE_Record.RepeatCount + 1); TotalBytesOut:=TotalBytesOut + RLE_Record.RepeatCount + 1; END; {LEN} RUN : BEGIN {flip the sign of the prefix byte} RLE_Record.RepeatCount:=RLE_Record.RepeatCount * -1; BlockWrite (OutputFile, RLE_Record, 2); TotalBytesOut:=TotalBytesOut + 2; END; {RUN} END; {CASE} Segments:=Segments+1; RLE_Record.Data[1]:=CurrentChar; {set new byte to compare} RLE_Record.RepeatCount:=1; {only one of them so far} END; {NewSegment} {================================================} FUNCTION ReadRaw : char; {feed one byte from the input file. A string buffer is used to hold the file's data to minimize the I/O time spent in the file.} BEGIN IF IBufferIndex<=ReadRes